********************************************************************************
clear all
set more off
capture log close

********************************************************************************
** File paths
** Set the current directory to the same folder where the code is

global input "../input" 
global output "../output" 
global temp "../temp"

** Used Stata package: wyoung, 1.0.2 14may2018 by Julian Reif 
**ssc install wyoung, replace

********************************************************************************
* Start log, save it into temp folder
log using "$temp/multiple_hypothesis_testing", replace

********************************************************************************
** Get data in cross section form
********************************************************************************
use $temp/pagelength_users_edits, clear
keep if vLanguage!="nl"
duplicates report vNumPageID vMonth

sum vLogLengthMinusTreatmentYr1 vLogLengthMinusTreatmentYr2 vLogLengthMinusTreatmentYr3 vLogLengthMinusTreatmentYr4 vAverUsersYr1 vAverUsersYr2 vAverUsersYr3 vAverUsersYr4 vAverEditDaysYr1 vAverEditDaysYr2 vAverEditDaysYr3 vAverEditDaysYr4 vAverEditsNotTTYr1 vAverEditsNotTTYr2 vAverEditsNotTTYr3 vAverEditsNotTTYr4 vAverEditDistYr1 vAverEditDistYr2 vAverEditDistYr3 vAverEditDistYr4 vAverEditDistDCapYr1 vAverEditDistDCapYr2 vAverEditDistDCapYr3 vAverEditDistDCapYr4

** Length calculated for September while other measures for August
xtset vNumPageID vMonth
forvalues i = 1/4{
	replace vLogLengthMinusTreatmentYr`i' = F1.vLogLengthMinusTreatmentYr`i' if vLogLengthMinusTreatmentYr`i'==.
} 
sum vLogLengthMinusTreatmentYr1 vLogLengthMinusTreatmentYr2 vLogLengthMinusTreatmentYr3 vLogLengthMinusTreatmentYr4 vAverUsersYr1 vAverUsersYr2 vAverUsersYr3 vAverUsersYr4 vAverEditDaysYr1 vAverEditDaysYr2 vAverEditDaysYr3 vAverEditDaysYr4 vAverEditsNotTTYr1 vAverEditsNotTTYr2 vAverEditsNotTTYr3 vAverEditsNotTTYr4 vAverEditDistYr1 vAverEditDistYr2 vAverEditDistYr3 vAverEditDistYr4 vAverEditDistDCapYr1 vAverEditDistDCapYr2 vAverEditDistDCapYr3 vAverEditDistDCapYr4

keep if vMonth == tm(2015m8) | vMonth == tm(2016m8) | vMonth == tm(2017m8)| vMonth == tm(2018m8)
sort vPage vLanguage vMonth

keep vNumPageID vPage vLanguage vMonth vTreatmentGroup vNumPage vNumLanguage vLogLength2014Aug vLogLengthMinusTreatmentYr1 vLogLengthMinusTreatmentYr2 vLogLengthMinusTreatmentYr3 vLogLengthMinusTreatmentYr4 vAverUsersYr1 vAverUsersYr2 vAverUsersYr3 vAverUsersYr4 vAverEditDaysYr1 vAverEditDaysYr2 vAverEditDaysYr3 vAverEditDaysYr4 vAverEditsNotTTYr1 vAverEditsNotTTYr2 vAverEditsNotTTYr3 vAverEditsNotTTYr4 vAverEditDistYr1 vAverEditDistYr2 vAverEditDistYr3 vAverEditDistYr4 vAverEditDistDCapYr1 vAverEditDistDCapYr2 vAverEditDistDCapYr3 vAverEditDistDCapYr4 vAfterTreatment1 vAfterTreatment2 vAfterTreatment3 vAfterTreatment4

reshape long vLogLengthMinusTreatmentYr vAverUsersYr vAverEditDaysYr vAverEditsNotTTYr vAverEditDistYr vAverEditDistDCapYr vAfterTreatment, i(vNumPageID vMonth) j(vYear)
drop if vLogLengthMinusTreatmentYr ==.
tabulate vMonth vYear
drop vMonth

reshape wide vLogLengthMinusTreatmentYr vAverUsersYr vAverEditDaysYr vAverEditsNotTTYr vAverEditDistYr vAverEditDistDCapYr vAfterTreatment, i(vNumPageID) j(vYear)

********************************************************************************
merge 1:1 vPage vLanguage using $temp/data_cross_section, keepusing(vDifLogLength_20182014 vDif_overall_32 vDifSimilarity_20182014 vDif_completeness_32 vDif_interesting_32 vDif_illustrated_32 vDif_wellwritten_32)
keep if _merge == 3
drop _merge

********************************************************************************
** Only long term length and quality outcomes
********************************************************************************
wyoung vDifLogLength_20182014 vDif_overall_32 vDifSimilarity_20182014 vDif_completeness_32 vDif_interesting_32 vDif_illustrated_32 vDif_wellwritten_32, cmd(regress OUTCOMEVAR vTreatmentGroup i.vNumLanguage i.vNumPage) familyp(vTreatmentGroup) bootstraps(10000) seed(10101)

display "`r(table)'"
matrix mA = r(table)
matrix list mA
matrix mResultsA = mA[1..7,1..4]
matrix list mResultsA
estadd matrix mResultsA = mResultsA, replace

esttab e(mResultsA, fmt(%9.3f)) using $output/tMultipleHypothesisTesting_LengthQuality.tex, replace label noobs plain compress fragment unstack varwidth(60) collabels(none) nomtitles prehead(`"Outcome variable &  Coef. &  SE &  Unadj. p-value &  Adj. p-value \\ & (1) & (2) & (3) & (4) \\\hline "') coeflabels(r1 "$\Delta$ log. length" r2 "$\Delta$ quality" r3 "$\Delta$ similarity" r4 "$\Delta$ complete" r5 "$\Delta$ interesting" r6 "$\Delta$ illustrated" r7 "$\Delta$ well-written") 

********************************************************************************
** Short term editing activity
********************************************************************************
wyoung vAverUsersYr1 vAverUsersYr2 vAverEditDaysYr1 vAverEditDaysYr2 vAverEditsNotTTYr1 vAverEditsNotTTYr2 vAverEditDistYr1 vAverEditDistYr2 vAverEditDistDCapYr1 vAverEditDistDCapYr2 , cmd(regress OUTCOMEVAR vTreatmentGroup i.vNumLanguage i.vNumPage) familyp(vTreatmentGroup) bootstraps(10000) seed(10101)

display "`r(table)'"
matrix mB = r(table)
matrix list mB
matrix mResultsB = mB[1..10,1..4]
matrix list mResultsB
estadd matrix mResultsB = mResultsB, replace

esttab e(mResultsB, fmt(%9.3f)) using $output/tMultipleHypothesisTesting_ShortTermEditing.tex, replace label noobs plain compress fragment unstack varwidth(60) collabels(none) nomtitles prehead(`"Outcome variable &  Coef. &  SE &  Unadj. p-value &  Adj. p-value \\ & (1) & (2) & (3) & (4) \\\hline "') coeflabels(r1 "\# users: year 1" r2 "\# users: year 2" r3 "\# edits: year 1" r4 "\# edits: year 2"  r5 "\# edits excl. treatment: year 1" r6 "\# edits excl. treatment: year 2" r7 "Edit distance: year 1" r8 "Edit distance: year 2" r9 "Capped edit distance: year 1" r10 "Capped edit distance: year 2") 

********************************************************************************
** Long term editing activity
********************************************************************************
wyoung vAverUsersYr3 vAverUsersYr4 vAverEditDaysYr3 vAverEditDaysYr4 vAverEditsNotTTYr3 vAverEditsNotTTYr4 vAverEditDistYr3 vAverEditDistYr4 vAverEditDistDCapYr3 vAverEditDistDCapYr4 , cmd(regress OUTCOMEVAR vTreatmentGroup i.vNumLanguage i.vNumPage) familyp(vTreatmentGroup) bootstraps(10000) seed(10101)

display "`r(table)'"
matrix mC = r(table)
matrix list mC
matrix mResultsC = mC[1..10,1..4]
matrix list mResultsC
estadd matrix mResultsC = mResultsC, replace

esttab e(mResultsC, fmt(%9.3f)) using $output/tMultipleHypothesisTesting_LongTermEditing.tex, replace label noobs plain compress fragment unstack varwidth(60) collabels(none) nomtitles prehead(`"Outcome variable &  Coef. &  SE &  Unadj. p-value &  Adj. p-value \\ & (1) & (2) & (3) & (4) \\\hline "') coeflabels(r1 "\# users: year 3" r2 "\# users: year 4" r3 "\# edits: year 3" r4 "\# edits: year 4"  r5 "\# edits excl. treatment: year 3" r6 "\# edits excl. treatment: year 4" r7 "Edit distance: year 3" r8 "Edit distance: year 4" r9 "Capped edit distance: year 3" r10 "Capped edit distance: year 4") 

********************************************************************************
log close 
clear
